Cargando paquetes
raw_data <- here("data", "raw")
interim_data <- here("data", "interim")
processed_data <- here("data", "processed")
base <- readRDS(file=paste0(processed_data,"/base_final.Rds"))
base <- base %>%
mutate(buen_pagador = if_else(objetivo==0, "bueno", "malo"))
head(base)
task_tarj = TaskClassif$new(id = "tarjetas", backend = base, target = "objetivo")
Error: Target column 'objetivo' must have at least two levels
autoplot(task_tarj$select(c("saldo_tarjeta", "coeficiente_solvencia", "edad", "limite_tarjeta_credito")), type = "pairs")
task_tarj = TaskClassif$new(id = "tarjetas", backend = base, target = "buen_pagador")
lrn_rpart = lrn("classif.rpart", predict_type = "prob", predict_sets = c("train", "test"))
lrn_glmnet = lrn("classif.glmnet", predict_type = "prob", predict_sets = c("train", "test"))
lrn_knn = lrn("classif.kknn", predict_type = "prob", predict_sets = c("train", "test"))
lrn_lda = lrn("classif.lda", predict_type = "prob", predict_sets = c("train", "test"))
lrn_log_reg = lrn("classif.log_reg", predict_type = "prob", predict_sets = c("train", "test"))
lrn_bayes = lrn("classif.naive_bayes", predict_type = "prob", predict_sets = c("train", "test"))
lrn_rf = lrn("classif.ranger", predict_type = "prob", predict_sets = c("train", "test"))
lrn_svm = lrn("classif.svm", predict_type = "prob", predict_sets = c("train", "test"))
lrn_xgboost = lrn("classif.xgboost", predict_type = "prob", predict_sets = c("train", "test"))
set.seed(34678)
train_set = sample(task_tarj$nrow, 0.8 * task_tarj$nrow)
test_set = setdiff(seq_len(task_tarj$nrow), train_set)
impute_fcts <- po("imputemode", affect_columns = selector_type("factor"))
impute_nums <- po("imputehist", affect_columns = selector_type("numeric"))
encode <- po("encode", affect_columns = selector_type("factor"))
pre_procesamiento <- impute_fcts %>>%
impute_nums %>>%
encode
lrn_rpart <- GraphLearner$new(pre_procesamiento %>>% po(lrn_rpart))
lrn_glmnet <- GraphLearner$new(pre_procesamiento %>>% po(lrn_glmnet))
lrn_knn <- GraphLearner$new(pre_procesamiento %>>% po(lrn_knn))
lrn_lda <- GraphLearner$new(pre_procesamiento %>>% po(lrn_lda))
lrn_log_reg <- GraphLearner$new(pre_procesamiento %>>% po(lrn_log_reg))
lrn_bayes <- GraphLearner$new(pre_procesamiento %>>% po(lrn_bayes))
lrn_rf <- GraphLearner$new(pre_procesamiento %>>% po(lrn_rf))
lrn_svm <- GraphLearner$new(pre_procesamiento %>>% po(lrn_svm))
lrn_xgboost <- GraphLearner$new(pre_procesamiento %>>% po(lrn_xgboost))
lrn_rpart$predict_sets = c("train", "test")
lrn_glmnet$predict_sets = c("train", "test")
lrn_knn$predict_sets = c("train", "test")
lrn_lda$predict_sets = c("train", "test")
lrn_log_reg$predict_sets = c("train", "test")
lrn_bayes$predict_sets = c("train", "test")
lrn_rf$predict_sets = c("train", "test")
lrn_svm$predict_sets = c("train", "test")
lrn_xgboost$predict_sets = c("train", "test")
lrn_rpart$id = "Árbol"
lrn_glmnet$id = "Reg-reg"
lrn_knn$id = "K-vecinos"
lrn_lda$id = "LDA"
lrn_log_reg$id = "Reg-log"
lrn_bayes$id = "Bayes"
lrn_rf$id = "RandomForest"
lrn_svm$id = "SVM"
lrn_xgboost$id = "XGBoost"
learners = list(
lrn_rpart,
lrn_glmnet,
lrn_knn,
lrn_lda,
lrn_log_reg,
lrn_bayes,
lrn_rf,
lrn_svm,
lrn_xgboost
)
resamplings = rsmp("cv", folds = 10)
design = benchmark_grid(task_tarj, learners, resamplings)
future::plan("multiprocess")
[ONE-TIME WARNING] Forked processing ('multicore') is disabled in future (>= 1.13.0) when running R from RStudio, because it is considered unstable. Because of this, plan("multicore") will fall back to plan("sequential"), and plan("multiprocess") will fall back to plan("multisession") - not plan("multicore") as in the past. For more details, how to control forked processing or not, and how to silence this warning in future R sessions, see ?future::supportsMulticore
bmr = benchmark(design)
INFO [13:51:43.131] Benchmark with 90 resampling iterations
INFO [13:51:47.851] Applying learner 'SVM' on task 'tarjetas' (iter 5/10)
INFO [14:03:56.824] Applying learner 'XGBoost' on task 'tarjetas' (iter 9/10)
INFO [14:04:12.361] Applying learner 'RandomForest' on task 'tarjetas' (iter 1/10)
INFO [14:05:02.021] Applying learner 'Reg-reg' on task 'tarjetas' (iter 8/10)
INFO [14:05:09.841] Applying learner 'LDA' on task 'tarjetas' (iter 4/10)
INFO [14:05:15.472] Applying learner 'XGBoost' on task 'tarjetas' (iter 5/10)
INFO [14:05:22.733] Applying learner 'SVM' on task 'tarjetas' (iter 2/10)
INFO [14:09:07.831] Applying learner 'LDA' on task 'tarjetas' (iter 9/10)
INFO [14:09:09.914] Applying learner 'RandomForest' on task 'tarjetas' (iter 7/10)
INFO [14:09:25.559] Applying learner 'Reg-reg' on task 'tarjetas' (iter 5/10)
INFO [14:09:27.877] Applying learner 'K-vecinos' on task 'tarjetas' (iter 9/10)
INFO [14:09:53.303] Applying learner 'Árbol' on task 'tarjetas' (iter 6/10)
INFO [14:09:55.347] Applying learner 'LDA' on task 'tarjetas' (iter 10/10)
INFO [14:09:57.423] Applying learner 'LDA' on task 'tarjetas' (iter 1/10)
INFO [14:09:59.604] Applying learner 'RandomForest' on task 'tarjetas' (iter 5/10)
INFO [14:10:16.323] Applying learner 'Reg-log' on task 'tarjetas' (iter 6/10)
INFO [14:10:20.691] Applying learner 'Reg-log' on task 'tarjetas' (iter 7/10)
INFO [14:10:24.748] Applying learner 'LDA' on task 'tarjetas' (iter 2/10)
INFO [14:10:26.956] Applying learner 'Árbol' on task 'tarjetas' (iter 10/10)
INFO [14:10:29.972] Applying learner 'Árbol' on task 'tarjetas' (iter 8/10)
INFO [14:10:32.253] Applying learner 'SVM' on task 'tarjetas' (iter 10/10)
INFO [14:18:40.391] Applying learner 'LDA' on task 'tarjetas' (iter 8/10)
INFO [14:18:44.308] Applying learner 'RandomForest' on task 'tarjetas' (iter 2/10)
variables are collinearvariables are collinearvariables are collinearvariables are collinearprediction from a rank-deficient fit may be misleadingprediction from a rank-deficient fit may be misleadingprediction from a rank-deficient fit may be misleadingprediction from a rank-deficient fit may be misleadingvariables are collinearvariables are collinear
INFO [13:51:53.174] Applying learner 'LDA' on task 'tarjetas' (iter 7/10)
INFO [13:52:03.373] Applying learner 'K-vecinos' on task 'tarjetas' (iter 4/10)
INFO [13:53:19.560] Applying learner 'Reg-reg' on task 'tarjetas' (iter 3/10)
INFO [13:53:30.434] Applying learner 'RandomForest' on task 'tarjetas' (iter 4/10)
INFO [13:54:20.423] Applying learner 'SVM' on task 'tarjetas' (iter 8/10)
INFO [14:05:30.140] Applying learner 'LDA' on task 'tarjetas' (iter 6/10)
INFO [14:05:32.712] Applying learner 'K-vecinos' on task 'tarjetas' (iter 7/10)
INFO [14:05:55.551] Applying learner 'K-vecinos' on task 'tarjetas' (iter 8/10)
INFO [14:06:21.490] Applying learner 'Árbol' on task 'tarjetas' (iter 4/10)
INFO [14:06:24.828] Applying learner 'XGBoost' on task 'tarjetas' (iter 10/10)
INFO [14:06:28.969] Applying learner 'Reg-reg' on task 'tarjetas' (iter 1/10)
INFO [14:06:31.656] Applying learner 'Bayes' on task 'tarjetas' (iter 9/10)
INFO [14:06:53.035] Applying learner 'XGBoost' on task 'tarjetas' (iter 4/10)
INFO [14:06:55.223] Applying learner 'Bayes' on task 'tarjetas' (iter 4/10)
INFO [14:07:15.253] Applying learner 'SVM' on task 'tarjetas' (iter 3/10)
INFO [14:11:35.424] Applying learner 'Bayes' on task 'tarjetas' (iter 7/10)
INFO [14:12:34.694] Applying learner 'K-vecinos' on task 'tarjetas' (iter 10/10)
INFO [14:13:50.922] Applying learner 'Reg-reg' on task 'tarjetas' (iter 10/10)
INFO [14:13:58.568] Applying learner 'RandomForest' on task 'tarjetas' (iter 3/10)
INFO [14:14:43.545] Applying learner 'XGBoost' on task 'tarjetas' (iter 6/10)
INFO [14:14:48.297] Applying learner 'Reg-reg' on task 'tarjetas' (iter 7/10)
INFO [14:14:55.788] Applying learner 'XGBoost' on task 'tarjetas' (iter 7/10)
variables are collinearvariables are collinear
INFO [13:51:58.165] Applying learner 'Reg-log' on task 'tarjetas' (iter 3/10)
INFO [13:52:10.773] Applying learner 'K-vecinos' on task 'tarjetas' (iter 6/10)
INFO [13:53:35.219] Applying learner 'Árbol' on task 'tarjetas' (iter 5/10)
INFO [13:53:44.797] Applying learner 'SVM' on task 'tarjetas' (iter 6/10)
INFO [14:05:35.308] Applying learner 'Árbol' on task 'tarjetas' (iter 3/10)
INFO [14:05:37.897] Applying learner 'SVM' on task 'tarjetas' (iter 1/10)
INFO [14:09:07.735] Applying learner 'Reg-log' on task 'tarjetas' (iter 10/10)
INFO [14:09:10.470] Applying learner 'Bayes' on task 'tarjetas' (iter 2/10)
INFO [14:09:34.768] Applying learner 'Reg-log' on task 'tarjetas' (iter 1/10)
INFO [14:09:37.510] Applying learner 'Árbol' on task 'tarjetas' (iter 1/10)
INFO [14:09:39.897] Applying learner 'K-vecinos' on task 'tarjetas' (iter 2/10)
INFO [14:10:10.328] Applying learner 'Árbol' on task 'tarjetas' (iter 9/10)
INFO [14:10:12.726] Applying learner 'RandomForest' on task 'tarjetas' (iter 6/10)
INFO [14:10:30.708] Applying learner 'Bayes' on task 'tarjetas' (iter 1/10)
INFO [14:10:52.541] Applying learner 'SVM' on task 'tarjetas' (iter 7/10)
INFO [14:19:23.859] Applying learner 'Bayes' on task 'tarjetas' (iter 8/10)
INFO [14:19:53.067] Applying learner 'Reg-reg' on task 'tarjetas' (iter 9/10)
INFO [14:19:57.209] Applying learner 'RandomForest' on task 'tarjetas' (iter 8/10)
INFO [14:20:22.666] Applying learner 'LDA' on task 'tarjetas' (iter 5/10)
INFO [14:20:26.858] Applying learner 'Reg-log' on task 'tarjetas' (iter 9/10)
INFO [14:20:31.870] Applying learner 'K-vecinos' on task 'tarjetas' (iter 1/10)
INFO [14:21:03.157] Applying learner 'Reg-log' on task 'tarjetas' (iter 5/10)
prediction from a rank-deficient fit may be misleadingprediction from a rank-deficient fit may be misleadingprediction from a rank-deficient fit may be misleadingprediction from a rank-deficient fit may be misleadingprediction from a rank-deficient fit may be misleadingprediction from a rank-deficient fit may be misleadingvariables are collinearprediction from a rank-deficient fit may be misleadingprediction from a rank-deficient fit may be misleadingprediction from a rank-deficient fit may be misleadingprediction from a rank-deficient fit may be misleading
INFO [13:52:07.032] Applying learner 'Bayes' on task 'tarjetas' (iter 3/10)
INFO [13:53:20.128] Applying learner 'K-vecinos' on task 'tarjetas' (iter 3/10)
INFO [13:54:54.520] Applying learner 'Árbol' on task 'tarjetas' (iter 7/10)
INFO [13:55:02.302] Applying learner 'Reg-reg' on task 'tarjetas' (iter 6/10)
INFO [13:55:14.226] Applying learner 'RandomForest' on task 'tarjetas' (iter 10/10)
INFO [13:56:02.095] Applying learner 'Bayes' on task 'tarjetas' (iter 6/10)
INFO [13:57:16.564] Applying learner 'SVM' on task 'tarjetas' (iter 4/10)
INFO [14:06:19.630] Applying learner 'RandomForest' on task 'tarjetas' (iter 9/10)
INFO [14:06:35.774] Applying learner 'XGBoost' on task 'tarjetas' (iter 3/10)
INFO [14:06:38.084] Applying learner 'Árbol' on task 'tarjetas' (iter 2/10)
INFO [14:06:40.168] Applying learner 'K-vecinos' on task 'tarjetas' (iter 5/10)
INFO [14:07:04.088] Applying learner 'XGBoost' on task 'tarjetas' (iter 8/10)
INFO [14:07:06.284] Applying learner 'Reg-log' on task 'tarjetas' (iter 2/10)
INFO [14:07:08.776] Applying learner 'SVM' on task 'tarjetas' (iter 9/10)
INFO [14:11:32.237] Applying learner 'LDA' on task 'tarjetas' (iter 3/10)
INFO [14:11:39.076] Applying learner 'XGBoost' on task 'tarjetas' (iter 2/10)
INFO [14:11:45.433] Applying learner 'Reg-reg' on task 'tarjetas' (iter 2/10)
INFO [14:11:54.250] Applying learner 'Reg-reg' on task 'tarjetas' (iter 4/10)
INFO [14:12:00.431] Applying learner 'Reg-log' on task 'tarjetas' (iter 4/10)
INFO [14:12:07.894] Applying learner 'Bayes' on task 'tarjetas' (iter 10/10)
INFO [14:13:10.240] Applying learner 'Reg-log' on task 'tarjetas' (iter 8/10)
INFO [14:13:20.046] Applying learner 'XGBoost' on task 'tarjetas' (iter 1/10)
INFO [14:13:28.380] Applying learner 'Bayes' on task 'tarjetas' (iter 5/10)
prediction from a rank-deficient fit may be misleadingprediction from a rank-deficient fit may be misleadingvariables are collinearprediction from a rank-deficient fit may be misleadingprediction from a rank-deficient fit may be misleadingprediction from a rank-deficient fit may be misleadingprediction from a rank-deficient fit may be misleading
INFO [14:21:08.923] Finished benchmark
measures = list(
msr("classif.auc", id = "auc_train", predict_sets = "train"),
msr("classif.auc", id = "auc_test"),
msr("classif.acc", id = "acc_train", predict_sets = "train"),
msr("classif.acc", id = "acc_test"),
msr("classif.fnr", id = "fnr_train", predict_sets = "train"),
msr("classif.fnr", id = "fnr_test")
)
bmr$aggregate(measures)